suppressPackageStartupMessages(library(tidyverse))
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities

Settings

wd <- "/Users/s-mitsutomi/Google Drive/My Drive/Analysis/METTL2A/"
setwd(wd)

data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'
tabledir_DRS_genome <- paste0(wd, 'Tables/DRS_m3C_sites/Genomic/')
tabledir_intersect <- paste0(wd, 'Tables/AlkAnilineSeq/Intersection/')

figdir <- paste0(wd, 'Figures/AAS_DRS_interaction/')

#figdir <- paste0(wd, 'Figures/DRS_m3C_sites/Metagene/')
#tabledir <- paste0(wd, 'Tables/DRS_m3C_sites/')

theme_set(
  theme_classic(base_size = 7) +
    theme(legend.position = 'bottom')
)

Read data

total_cleaved_reads <- 
  read_tsv(
    paste0(wd, 'Tables/AlkAnilineSeq/total_cleaved_reads_2024-04-02.tsv')
  )
## Rows: 617758 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): transcript_id, transcript_name, transcript_type, chr, strand, base
## dbl (2): pos, total_cleaved_reads
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
total_cleaved_reads
## # A tibble: 617,758 × 8
##    transcript_id     transcript_name transcript_type chr        pos strand base 
##    <chr>             <chr>           <chr>           <chr>    <dbl> <chr>  <chr>
##  1 nbis-gene-151     <NA>            <NA>            chr6    2.66e7 +      G    
##  2 nbis-gene-648     <NA>            <NA>            chr19   3.94e7 -      G    
##  3 nbis-gene-591     <NA>            <NA>            chr12   9.85e7 +      G    
##  4 nbis-gene-503     <NA>            <NA>            chr12   1.25e8 +      G    
##  5 nbis-gene-150     <NA>            <NA>            chr6    2.66e7 +      G    
##  6 ENST00000387441.1 MT-TH-201       Mt_tRNA         chrM    1.22e4 +      G    
##  7 nbis-gene-92      <NA>            <NA>            chr17   8.12e6 +      G    
##  8 nbis-gene-640     <NA>            <NA>            chr1    2.05e8 +      G    
##  9 nbis-gene-450     <NA>            <NA>            chr14   2.06e7 -      G    
## 10 nbis-gene-555     <NA>            <NA>            chr5    1.81e8 +      G    
## # ℹ 617,748 more rows
## # ℹ 1 more variable: total_cleaved_reads <dbl>
total_cleaved_reads |> 
  group_by(base) |> 
  reframe(n = n()) |> 
  mutate(percent = 100 * n / sum(n))
## # A tibble: 5 × 3
##   base       n   percent
##   <chr>  <int>     <dbl>
## 1 A     216328 35.0     
## 2 C      89172 14.4     
## 3 G     172802 28.0     
## 4 N          3  0.000486
## 5 T     139453 22.6
reads_cleaved_at_C <- 
  total_cleaved_reads |> 
  filter(base == 'C')
reads_cleaved_at_C
## # A tibble: 89,172 × 8
##    transcript_id     transcript_name transcript_type chr        pos strand base 
##    <chr>             <chr>           <chr>           <chr>    <dbl> <chr>  <chr>
##  1 ENST00000651540.1 ENST00000651540 lncRNA          chr7    1.49e8 +      C    
##  2 ENST00000363286.1 RNU5B-1-201     snRNA           chr15   6.53e7 +      C    
##  3 nbis-gene-416     <NA>            <NA>            chr17   3.16e7 +      C    
##  4 nbis-gene-24      <NA>            <NA>            chr6    2.75e7 +      C    
##  5 nbis-gene-367     <NA>            <NA>            chr16   1.43e7 +      C    
##  6 ENST00000387460.2 MT-TT-201       Mt_tRNA         chrM    1.59e4 +      C    
##  7 .                 <NA>            <NA>            chr19   5.75e7 -      C    
##  8 ENST00000387377.1 MT-TM-201       Mt_tRNA         chrM    4.43e3 +      C    
##  9 .                 <NA>            <NA>            chr3    4.86e7 +      C    
## 10 nbis-gene-106     <NA>            <NA>            chr17   8.14e6 -      C    
## # ℹ 89,162 more rows
## # ℹ 1 more variable: total_cleaved_reads <dbl>
reads_cleaved_at_C |> 
  ggplot(aes(x = total_cleaved_reads)) +
  geom_density() +
  scale_x_log10()

Read m3C site in DRS

m3C_sites_gppy <- 
  read_bed12(paste0(wd, 'Tables/m3C_sites_gppy.bed')) |> 
  select(chrom, start, end, name, strand) |> 
  mutate(m3C_DRS = TRUE)  |> 
  rename(
    pos = start,
    chr = chrom,
    #transcript_id = name
  )
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
m3C_sites_gppy |> 
  export_tsv(outdir = tabledir_DRS_genome, compression = 'gz')
## 
## Exported to: /Users/s-mitsutomi/Google Drive/My Drive/Analysis/METTL2A/Tables/DRS_m3C_sites/Genomic/m3C_sites_gppy_2025-07-18.tsv.gz
## # A tibble: 489 × 6
##    chr         pos       end name               strand m3C_DRS
##    <chr>     <dbl>     <dbl> <chr>              <chr>  <lgl>  
##  1 chr8   56073701  56073702 ENST00000009589.8  -      TRUE   
##  2 chr19  41771316  41771317 ENST00000199764.7  +      TRUE   
##  3 chr12 112408263 112408264 ENST00000202773.14 -      TRUE   
##  4 chr12 112406861 112406862 ENST00000202773.14 -      TRUE   
##  5 chr22  23894563  23894564 ENST00000215754.8  +      TRUE   
##  6 chr22  23894574  23894575 ENST00000215754.8  +      TRUE   
##  7 chr22  23895151  23895152 ENST00000215754.8  +      TRUE   
##  8 chr22  23894462  23894463 ENST00000215754.8  +      TRUE   
##  9 chr12   6536569   6536570 ENST00000229239.10 +      TRUE   
## 10 chr12   6537764   6537765 ENST00000229239.10 +      TRUE   
## # ℹ 479 more rows
excluded_rnatype <- c(
  NA, 'lncRNA', 'snRNA', 'snoRNA', 'nonsense_mediated_decay', 'miRNA',
  'Mt_tRNA', 'rRNA_pseudogene', 'ribozyme', 'misc_RNA', 'rRNA',
  "transcribed_unprocessed_pseudogene", "protein_coding_CDS_not_defined"
)

check_cleaved_reads_thresh <- function(cleave_thresh) {
  cleaved_C_enough_reads <-  
    reads_cleaved_at_C |> 
    filter(total_cleaved_reads > cleave_thresh)
  cleaved_C_enough_reads
  
  merge_DRS_cleaved_C_enough_reads <- 
    cleaved_C_enough_reads |> 
    left_join(m3C_sites_gppy) |> 
    filter(
      !(transcript_type %in% excluded_rnatype))
  
  # merge_DRS_cleaved_C_enough_reads |> 
  #   group_by(transcript_type) |> 
  #   reframe(n = n())
  
  merge_DRS_cleaved_C_enough_reads |> 
    group_by(m3C_DRS) |> 
    reframe(n = n()) |> 
    mutate(percent =  100 * n / sum(n)) |> 
    mutate(cleave_thresh = cleave_thresh)
  
}

overlap_AAS_DRS_threshold <- 
  seq(1, 100, 1) |> 
  map(check_cleaved_reads_thresh) |> 
  reduce(bind_rows) |> 
  filter(m3C_DRS == TRUE) |> 
  arrange(-percent) 
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
## Joining with `by = join_by(chr, pos, strand)`
overlap_AAS_DRS_threshold |> 
  export_tsv(outdir = tabledir_intersect)
## 
## Exported to: /Users/s-mitsutomi/Google Drive/My Drive/Analysis/METTL2A/Tables/AlkAnilineSeq/Intersection/overlap_AAS_DRS_threshold_2025-07-18.tsv
## # A tibble: 100 × 4
##    m3C_DRS     n percent cleave_thresh
##    <lgl>   <int>   <dbl>         <dbl>
##  1 TRUE        9    5.56            68
##  2 TRUE        9    5.52            67
##  3 TRUE        9    5.42            66
##  4 TRUE        9    5.29            65
##  5 TRUE        9    5.17            64
##  6 TRUE        8    5.16            71
##  7 TRUE        9    5.11            62
##  8 TRUE        9    5.11            63
##  9 TRUE        7    5.11            79
## 10 TRUE        7    5.07            78
## # ℹ 90 more rows
overlap_AAS_DRS_threshold |> 
  filter(cleave_thresh %% 10 == 0)
## # A tibble: 10 × 4
##    m3C_DRS     n percent cleave_thresh
##    <lgl>   <int>   <dbl>         <dbl>
##  1 TRUE        8    5.06            70
##  2 TRUE        9    4.92            60
##  3 TRUE       10    4.48            50
##  4 TRUE        6    4.41            80
##  5 TRUE        5    4.27            90
##  6 TRUE       11    4.04            40
##  7 TRUE        4    3.85           100
##  8 TRUE       23    3.67            10
##  9 TRUE       16    3.66            20
## 10 TRUE       12    3.61            30

Total cleaved reads > 50

Total 6146 sites

total_cleaved_reads_morethan50 <- 
  total_cleaved_reads |> 
  filter(total_cleaved_reads >= 50) |> 
  mutate( 
    transcript_type2 = ifelse(
      grepl('nbis-gene-', transcript_id), 
      'tRNA', transcript_type)
  ) |> 
  select(transcript_type2, everything()) |> 
  arrange(-total_cleaved_reads)
total_cleaved_reads_morethan50 
## # A tibble: 6,237 × 9
##    transcript_type2 transcript_id   transcript_name transcript_type chr      pos
##    <chr>            <chr>           <chr>           <chr>           <chr>  <dbl>
##  1 tRNA             nbis-gene-151   <NA>            <NA>            chr6  2.66e7
##  2 tRNA             nbis-gene-648   <NA>            <NA>            chr19 3.94e7
##  3 tRNA             nbis-gene-591   <NA>            <NA>            chr12 9.85e7
##  4 tRNA             nbis-gene-503   <NA>            <NA>            chr12 1.25e8
##  5 tRNA             nbis-gene-150   <NA>            <NA>            chr6  2.66e7
##  6 Mt_tRNA          ENST0000038744… MT-TH-201       Mt_tRNA         chrM  1.22e4
##  7 tRNA             nbis-gene-92    <NA>            <NA>            chr17 8.12e6
##  8 tRNA             nbis-gene-640   <NA>            <NA>            chr1  2.05e8
##  9 tRNA             nbis-gene-450   <NA>            <NA>            chr14 2.06e7
## 10 tRNA             nbis-gene-555   <NA>            <NA>            chr5  1.81e8
## # ℹ 6,227 more rows
## # ℹ 3 more variables: strand <chr>, base <chr>, total_cleaved_reads <dbl>
total_cleaved_reads_morethan50_annotated <- 
  total_cleaved_reads_morethan50 |> 
  filter(!is.na(transcript_type2))
total_cleaved_reads_morethan50_annotated
## # A tibble: 4,566 × 9
##    transcript_type2 transcript_id   transcript_name transcript_type chr      pos
##    <chr>            <chr>           <chr>           <chr>           <chr>  <dbl>
##  1 tRNA             nbis-gene-151   <NA>            <NA>            chr6  2.66e7
##  2 tRNA             nbis-gene-648   <NA>            <NA>            chr19 3.94e7
##  3 tRNA             nbis-gene-591   <NA>            <NA>            chr12 9.85e7
##  4 tRNA             nbis-gene-503   <NA>            <NA>            chr12 1.25e8
##  5 tRNA             nbis-gene-150   <NA>            <NA>            chr6  2.66e7
##  6 Mt_tRNA          ENST0000038744… MT-TH-201       Mt_tRNA         chrM  1.22e4
##  7 tRNA             nbis-gene-92    <NA>            <NA>            chr17 8.12e6
##  8 tRNA             nbis-gene-640   <NA>            <NA>            chr1  2.05e8
##  9 tRNA             nbis-gene-450   <NA>            <NA>            chr14 2.06e7
## 10 tRNA             nbis-gene-555   <NA>            <NA>            chr5  1.81e8
## # ℹ 4,556 more rows
## # ℹ 3 more variables: strand <chr>, base <chr>, total_cleaved_reads <dbl>

Base composition

All

total_cleaved_reads_morethan50_basecomposition <- 
  total_cleaved_reads_morethan50 |> 
  group_by(base) |> 
  reframe(n = n()) |> 
  mutate(percent = 100 * n / sum(n))
total_cleaved_reads_morethan50_basecomposition
## # A tibble: 5 × 3
##   base      n percent
##   <chr> <int>   <dbl>
## 1 A      1797 28.8   
## 2 C      1108 17.8   
## 3 G      1855 29.7   
## 4 N         1  0.0160
## 5 T      1476 23.7
total_cleaved_reads_morethan50_basecomposition_barplot <- 
  total_cleaved_reads_morethan50_basecomposition |> 
  ggplot(aes(x = '', y = percent, fill = base |> reorder(-n))) +
  geom_bar(stat = 'identity', position = 'stack') +
  scale_fill_manual(
    values = c('#01C001', '#5051FF','#E6E602', '#E00800', 'gray')
  ) +
  coord_flip()
total_cleaved_reads_morethan50_basecomposition_barplot |> 
  ggsave_pdf(
    width = 4, height = 2.5, outdir = figdir
  )

Annotated sites (without inter gene regions)

total_cleaved_reads_morethan50_annotated_basecomposition <- 
  total_cleaved_reads_morethan50_annotated |> 
  group_by(base) |> 
  reframe(n = n()) |> 
  mutate(percent = 100 * n / sum(n))
total_cleaved_reads_morethan50_annotated_basecomposition
## # A tibble: 5 × 3
##   base      n percent
##   <chr> <int>   <dbl>
## 1 A      1363 29.9   
## 2 C       790 17.3   
## 3 G      1363 29.9   
## 4 N         1  0.0219
## 5 T      1049 23.0
total_cleaved_reads_morethan50_annotated_basecomposition_barplot <- 
  total_cleaved_reads_morethan50_annotated_basecomposition |> 
  ggplot(aes(x = '', y = percent, fill = base |> reorder(-n))) +
  geom_bar(stat = 'identity', position = 'stack') +
  scale_fill_manual(
    values = c('#01C001', '#5051FF','#E6E602', '#E00800', 'gray')
  ) +
  coord_flip()
total_cleaved_reads_morethan50_annotated_basecomposition_barplot |> 
  ggsave_pdf(
    width = 4, height = 2.5, outdir = figdir
  )

cleaved_C_enough_reads_50 <- 
  total_cleaved_reads_morethan50_annotated |> 
  filter(base == 'C') 
cleaved_C_enough_reads_50 |> 
  export_tsv(outdir = tabledir_intersect, compression = 'gz')
## 
## Exported to: /Users/s-mitsutomi/Google Drive/My Drive/Analysis/METTL2A/Tables/AlkAnilineSeq/Intersection/cleaved_C_enough_reads_50_2025-07-18.tsv.gz
## # A tibble: 790 × 9
##    transcript_type2 transcript_id   transcript_name transcript_type chr      pos
##    <chr>            <chr>           <chr>           <chr>           <chr>  <dbl>
##  1 lncRNA           ENST0000065154… ENST00000651540 lncRNA          chr7  1.49e8
##  2 snRNA            ENST0000036328… RNU5B-1-201     snRNA           chr15 6.53e7
##  3 tRNA             nbis-gene-416   <NA>            <NA>            chr17 3.16e7
##  4 tRNA             nbis-gene-24    <NA>            <NA>            chr6  2.75e7
##  5 tRNA             nbis-gene-367   <NA>            <NA>            chr16 1.43e7
##  6 Mt_tRNA          ENST0000038746… MT-TT-201       Mt_tRNA         chrM  1.59e4
##  7 Mt_tRNA          ENST0000038737… MT-TM-201       Mt_tRNA         chrM  4.43e3
##  8 tRNA             nbis-gene-106   <NA>            <NA>            chr17 8.14e6
##  9 tRNA             nbis-gene-150   <NA>            <NA>            chr6  2.66e7
## 10 tRNA             nbis-gene-290   <NA>            <NA>            chr12 5.62e7
## # ℹ 780 more rows
## # ℹ 3 more variables: strand <chr>, base <chr>, total_cleaved_reads <dbl>
cleaved_C_enough_reads_50 |> 
  group_by(transcript_type2) |> 
  reframe(n = n()) |> 
  mutate(percent = 100 * n / sum(n)) |> 
  arrange(-percent)
## # A tibble: 15 × 3
##    transcript_type2                       n percent
##    <chr>                              <int>   <dbl>
##  1 Mt_rRNA                              202  25.6  
##  2 rRNA_pseudogene                      185  23.4  
##  3 tRNA                                 184  23.3  
##  4 snRNA                                 55   6.96 
##  5 lncRNA                                28   3.54 
##  6 misc_RNA                              28   3.54 
##  7 Mt_tRNA                               26   3.29 
##  8 rRNA                                  24   3.04 
##  9 protein_coding                        23   2.91 
## 10 snoRNA                                17   2.15 
## 11 ribozyme                              10   1.27 
## 12 miRNA                                  3   0.380
## 13 nonsense_mediated_decay                3   0.380
## 14 protein_coding_CDS_not_defined         1   0.127
## 15 transcribed_unprocessed_pseudogene     1   0.127
cleaved_C_enough_reads_50 |> 
  ggplot(aes(
    x = transcript_type2 |> reorder(total_cleaved_reads, sum), 
    y = total_cleaved_reads
  )) +
  geom_violin() +
  scale_y_log10() +
  coord_flip()
## Warning: Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.

merge_DRS_cleaved_C_enough_reads_50 <- 
  cleaved_C_enough_reads_50 |> 
  filter(
    !(transcript_type %in% excluded_rnatype)
  ) |> 
  full_join(m3C_sites_gppy) 
## Joining with `by = join_by(chr, pos, strand)`
merge_DRS_cleaved_C_enough_reads_50 |> 
  export_tsv(outdir = tabledir_intersect, compression = 'gz')
## 
## Exported to: /Users/s-mitsutomi/Google Drive/My Drive/Analysis/METTL2A/Tables/AlkAnilineSeq/Intersection/merge_DRS_cleaved_C_enough_reads_50_2025-07-18.tsv.gz
## # A tibble: 704 × 12
##    transcript_type2 transcript_id    transcript_name transcript_type chr     pos
##    <chr>            <chr>            <chr>           <chr>           <chr> <dbl>
##  1 Mt_rRNA          ENST00000387347… MT-RNR2-201     Mt_rRNA         chrM   2240
##  2 Mt_rRNA          ENST00000389680… MT-RNR1-201     Mt_rRNA         chrM   1011
##  3 Mt_rRNA          ENST00000389680… MT-RNR1-201     Mt_rRNA         chrM   1009
##  4 Mt_rRNA          ENST00000387347… MT-RNR2-201     Mt_rRNA         chrM   1827
##  5 Mt_rRNA          ENST00000387347… MT-RNR2-201     Mt_rRNA         chrM   2347
##  6 Mt_rRNA          ENST00000389680… MT-RNR1-201     Mt_rRNA         chrM   1508
##  7 Mt_rRNA          ENST00000387347… MT-RNR2-201     Mt_rRNA         chrM   2340
##  8 Mt_rRNA          ENST00000389680… MT-RNR1-201     Mt_rRNA         chrM   1511
##  9 Mt_rRNA          ENST00000387347… MT-RNR2-201     Mt_rRNA         chrM   1817
## 10 Mt_rRNA          ENST00000389680… MT-RNR1-201     Mt_rRNA         chrM   1001
## # ℹ 694 more rows
## # ℹ 6 more variables: strand <chr>, base <chr>, total_cleaved_reads <dbl>,
## #   end <dbl>, name <chr>, m3C_DRS <lgl>
unique(merge_DRS_cleaved_C_enough_reads_50$transcript_type2)
## [1] "Mt_rRNA"        "protein_coding" NA
merge_DRS_cleaved_C_enough_reads_50 |> 
  group_by(m3C_DRS, transcript_type) |> 
  reframe(n = n())
## # A tibble: 4 × 3
##   m3C_DRS transcript_type     n
##   <lgl>   <chr>           <int>
## 1 TRUE    Mt_rRNA            10
## 2 TRUE    <NA>              479
## 3 NA      Mt_rRNA           192
## 4 NA      protein_coding     23
merge_DRS_cleaved_C_enough_reads_50 |> 
  filter(chr == 'chrM') |> 
  group_by(m3C_DRS, transcript_type) |> 
  reframe(n = n())
## # A tibble: 4 × 3
##   m3C_DRS transcript_type     n
##   <lgl>   <chr>           <int>
## 1 TRUE    Mt_rRNA            10
## 2 TRUE    <NA>              219
## 3 NA      Mt_rRNA           192
## 4 NA      protein_coding      2